**Housekeeping
clear all
cap log close
log using ${rep_root}/logs/dyads.log, text replace
set more off

insheet using "${rep_root}/data/FOIA/CLEAR CR Status Data.csv" 
tempfile cr_temp
save `cr_temp'

clear
insheet using "${rep_root}/data/FOIA/CLEAR INFO Status Data.csv" 
append using `cr_temp'

tab status

  

//order chronologically
gen ampm = substr(status_time, -2,2)
tab ampm
gen hr = substr(status_time, 1, strpos(status_time,":")-1)
tab hr
gen min = substr(status_time, strpos(status_time,":")+1, 2)
destring hr min, replace
replace hr = hr + 12 if hr<12 & ampm=="PM" 
replace hr = 0 if hr==12 & ampm=="AM"



gen status_time_str = status_date+ " " + status_time
gen status_dt = date(status_date, "MDY", 2025)
gen status_yr = year(status_dt) 
gen status_month = month(status_dt) 
gen dow = dow(status_dt)
sort cr_number status_dt hr min    

**Get intake time for each case
preserve
sort cr_number status_dt hr min
by cr_number: keep if _n==1

keep cr_number status_dt hr min dow
rename status_dt intake_dt
rename hr hr_intake
rename min min_intake
rename dow dow_intake

tempfile intake
save `intake'
restore
 


sort cr_number
merge m:1 cr_number using `intake', nogen

gen inv = inlist(position, "INVESTIGATOR", "INVESTIGATOR I COPA", "INVESTIGATOR II COPA", "INVESTIGATOR III COPA")
keep if inv==1 & status=="PENDING INVESTIGATIVE REVIEW"

sort cr_number intake_dt hr_intake min_intake
by cr_number: keep if _n==1 

egen nonsup_inv_id = group(created_by) 

keep cr_number nonsup_inv_id
rename cr_number cr_id
tempfile nonsup_inv_id
save `nonsup_inv_id'
clear

local cutoff = 50
local case_lb = `cutoff'

do data_restrict.do

**Drop investigators without enough cases
tab inv_id
sort inv_id 
by inv_id: gen inv_ct = _N
drop if inv_ct<`case_lb'
tab inv_id intake_yr

sort cr_id
merge m:1 cr_id using `nonsup_inv_id', keep(3) nogen


**Find non-supervisor investigators in the previous year
preserve
sort intake_yr inv_id nonsup_inv_id 
by intake_yr inv_id nonsup_inv_id: keep if _n==1

keep intake_yr inv_id nonsup_inv_id 
rename nonsup_inv_id prev_nonsup_inv

by intake_yr inv_id: gen idx = _n 
sum idx
local idx_max = r(max)

reshape wide prev_nonsup_inv, i(intake_yr inv_id) j(idx) 

replace intake_yr = intake_yr + 1 
replace intake_yr = 2014 if intake_yr==2009 

tempfile prev_nonsup_inv
save `prev_nonsup_inv'
restore 

**Calculate percentage of cases had non-supervisor investigator who was present in prior year
sort intake_yr inv_id
merge m:1 intake_yr inv_id using `prev_nonsup_inv', keep(1 3) gen(prev_merge)

gen nonsup_inv_continuity = 0
forvalues i = 1/`idx_max' {
	replace nonsup_inv_continuity = 1 if nonsup_inv_id==prev_nonsup_inv`i'
}
replace nonsup_inv_continuity = . if prev_merge==1

tab intake_yr nonsup_inv_continuity, m

preserve
sort intake_yr
by intake_yr: egen avg_continuity = mean(nonsup_inv_continuity)
by intake_yr: keep if _n==1 

list intake_yr avg_continuity

keep intake_yr avg_continuity
save ${rep_root}/data/dyad_continuity, replace
restore 


**Stringency analysis
local covars vic_female_any vic_white_any acc_white acc_male acc_old cat_3 cat_4 d_mo_* d_dist_* arrests_pre complaints_pre force_pre any_prior_complaint tenure
gen treat_affidavit = finding_cd != "NO AFFIDAVIT" 
gen vic_old = (year(inc_complaint_dt_1) - vic_byr_oldest)>=35 & !missing(vic_byr_oldest)
gen acc_old = acc_age>=35 & !missing(acc_age)
gen cat_3 = acc_cat==3
gen cat_4 = acc_cat==4

forvalues i = 1/25{
	gen d_dist_`i' = inc_district_1==`i'
}
forvalues yr = 2006/2008{
	gen d_yr_`yr' = intake_yr==`yr'

	forvalues m = 1/12{
		gen d_mo_yr_`m'_`yr' = intake_yr==`yr' & intake_mo==`m'
	}
	
}
forvalues yr = 2014/2018{
	gen d_yr_`yr' = intake_yr==`yr'
	
	forvalues m = 1/12{
		gen d_mo_yr_`m'_`yr' = intake_yr==`yr' & intake_mo==`m'
	}
}

merge 1:1 cr_id acc_id using ${rep_root}/data/outcomes, keep(3) keepusing(arrests_pre complaints_pre force_pre any_prior_complaint tenure) nogen
qui reg treat_affidavit `covars'
predict resid, resid 

sort inv_id intake_yr 
egen inv_yr = group(inv_id intake_yr)
sort inv_yr
by inv_yr: egen yr_resid = mean(resid)
by inv_yr: gen yr_mark = _n==1
by inv_yr: gen yr_ct = _N

sum yr_resid if yr_mark & yr_ct>=30, d

sort inv_id nonsup_inv_id 
egen dyad = group(inv_id nonsup_inv_id)
sort dyad
by dyad: egen dyad_resid = mean(resid)
by dyad: gen dyad_mark = _n==1 
by dyad: gen dyad_ct = _N 

sum dyad_resid if dyad_mark & dyad_ct>=30, d

